/*==============================================================================
FILE NAME: Process_NSR.do
INPUTS: NSR_Permits.dta
OUTPUTS: NSR_Permits_Clean.dta
==============================================================================*/

/* Set directory if working independently through code */
if c(username)=="" { //insert username
    global rootdir "" // insert root path
    global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 

// Prevent Stata from pausing output
set more off

// Load raw NSR permits data
use "$processed_data/NSR_Permits.dta", clear

// Rename permit number column for clarity
rename PERMIT_NUM PN 

// Drop unnecessary columns
drop REG_ENT_NAME PHYSICAL_ADDRESS PHYS_LOC_DESCRIPTION LAT_DEC_COORD_NUM LONG_DEC_COORD_NUM

// Fill missing city values using NEAR_CIT
replace CITY_NAME = NEAR_CIT if CITY_NAME == "" & NEAR_CIT != ""

// Drop NEAR_CIT column
drop NEAR_CIT

// Fill missing ZIP codes using LOC_ZIP
replace ZIP_CD = LOC_ZIP if ZIP_CD == "" & LOC_ZIP != ""

// Drop LOC_ZIP column
drop LOC_ZIP

// Rename columns for clarity
rename CITY_NAME City
rename ZIP_CD ZipCode
rename TCEQ_REGION TCEQRegion
rename COUNTY_NAME County

// Extract 2-digit SIC code and convert to numeric
tostring SIC, replace
gen SIC_2digit = substr(SIC,1,2)
drop SIC
rename SIC_2digit SIC
destring SIC, replace

// Rename columns for NSR context
foreach x in City ZipCode County TCEQRegion SIC{
    rename `x' `x'_NSR
}

// Convert ZipCode column to numeric
destring ZipCode_NSR, replace

// Rename permit status, type, and name columns for clarity
rename PERMIT_STATUS Permit_Status_NSR
rename PERMIT_TYPE Permit_Type_NSR
rename PERMIT_NAME Permit_Name_NSR

// Create NSR permit indicator
gen NSR = 1

// Save cleaned NSR permits data
save "$processed_data/NSR_Permits_Clean.dta", replace